Author

Alex Goodman

Code
library(tidyverse)
library(here)
library(gt)
library(DT)
library(kableExtra)
Code
namesA <- read_csv(here::here('Week 9', 'Lab 9', 'StateNames_A.csv'))
datatable(namesA)
Code
namesAllison <- namesA |>
  filter(`Name` == 'Allison') |>
  rename(sex = Gender) |>
  select(Name, sex, State, Count) |>
  group_by(`State`, sex) |>
  summarize(total_names = sum(Count)) |>
  pivot_wider(names_from = sex, values_from = total_names, values_fill = 0)
namesAllisonF <- namesAllison |>
  select(`State`, `F`)


namesAllison |>
    knitr::kable(format = "html", 
               digits = 3, 
               col.names = 
                 c("State", 
                   "F", 
                   "M"),
               caption = "Number of Allisons in the U.S by State and Sex")
Number of Allisons in the U.S by State and Sex
State F M
AK 232 0
AL 1535 0
AR 1198 0
AZ 1880 0
CA 12413 0
CO 1594 0
CT 1099 0
DC 321 0
DE 294 0
FL 4455 0
GA 3257 0
HI 183 0
IA 1477 0
ID 451 0
IL 5110 0
IN 3067 0
KS 1283 0
KY 1905 20
LA 1209 0
MA 2218 0
MD 2229 0
ME 340 0
MI 4014 0
MN 2374 0
MO 2882 0
MS 817 0
MT 226 0
NC 3435 0
ND 285 0
NE 807 0
NH 412 0
NJ 3052 0
NM 399 0
NV 729 0
NY 5747 0
OH 5487 0
OK 1421 0
OR 1186 0
PA 4307 0
RI 306 0
SC 1228 0
SD 376 0
TN 2488 0
TX 10192 0
UT 1125 0
VA 3220 0
VT 135 0
WA 1956 0
WI 2367 0
WV 813 0
WY 142 0
Code
names_viz <- namesA |>
  filter(`Name` == 'Allison') |>
  rename(sex = Gender)

my_viz <- names_viz |>
  group_by(`Year`) |>
  summarize(total_names = sum(Count))

ggplot(data = my_viz, mapping = aes(x = `Year`, y = total_names)) +
  geom_line() +
  geom_point() +
  labs(y = NULL, title = "Popularity of the name 'Allison' in the U.S. by Count")

Code
allison_lm <- my_viz |>
  lm(total_names ~ Year, data = _) 
  
broom::tidy(allison_lm)
# A tibble: 2 × 5
  term        estimate std.error statistic  p.value
  <chr>          <dbl>     <dbl>     <dbl>    <dbl>
1 (Intercept)  209815.   42883.       4.89 0.000163
2 Year           -102.      21.4     -4.75 0.000217
Code
#Regression Equation
# y = 209815.052 - 101.581x

allison_lm |> 
  broom::augment() |> 
  ggplot(mapping = aes(y = .resid, x = .fitted)) +
  geom_point() +
  geom_line()

Code
# The pattern of residuals appears to have a relatively u-shaped form in the middle, with a couple of smaller curves on the edges. It is pretty hard to tell whether the residuals are scattered randomly or have a nonlinear pattern, so it could really go either way. Because there tends to be a larger u-shape in the middle of the distribution, I would say a quadratic model or another nonlinear model could fit the data better than a linear one. So there seems to be a decline in the name 'Allison' and unfortunately, according to the format of the question and the model, the name isn't as 'cool' anymore.
Code
x <- c('Allan', 'Alan', 'Allen')
alls <- as.vector(namesA$Name)

allans <- namesA |>
  filter(`Gender` == 'M', `Name` == alls[alls %in% x]) |>
  rename(sex = Gender)

my_viz2 <- allans |>
  group_by(`Year`) |>
  summarize(total_names = sum(Count))

ggplot(data = my_viz2, mapping = aes(x = `Year`, y = total_names)) +
  geom_line() +
  geom_point() +
  labs(y = NULL, title = "Popularity of the names 'Allan', 'Alan', and 'Allen' in the U.S. by Count")

Code
# 9

allans2 <- namesA |>
  rename(sex = Gender) |>
  filter(Year == 2000, Name == 'Allan' | Name == 'Alan' | Name == 'Allen', State == 'PA' | State == 'CA' ) |>
  select(Name, sex, State, Count) |>
  group_by(`Name`, `State`) |>
  summarize(total_names = sum(Count)) |>
  group_by(`State`) |>
  mutate(total_names = total_names / sum(total_names)) |>
  pivot_wider(names_from = Name, values_from = total_names, values_fill = 0)

# percentage breakdown: from CA: 66% Alan, 15% Allan, 20% Allen. from PA: 43% Alan, 10% Allan, 47% Allen. 

allans2 %>%
    knitr::kable(format = "html", 
               digits = 3, 
               col.names = 
                 c("State", 
                   "Alan", 
                   "Allan",
                   "Allen"),
               caption = "Proportion of Allans, Allens, and Alans in CA and PA") %>%
  kableExtra::kable_styling(font_size = 20) %>%
  kableExtra::kable_classic(html_font = "verdana")
Proportion of Allans, Allens, and Alans in CA and PA
State Alan Allan Allen
CA 0.655 0.147 0.198
PA 0.429 0.101 0.471

Challenge 9

Part 1 - completed above Part 2 - I used the font size option for the last table from the kableExtra package. I also changed the font to verdana with the kable_classic() theming option, and added a title. Part 3 - added at beginning